library(brms)
library(data.table)
library(texreg)

# SET WORKING DIRECTORY
setwd("C:/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive/visitor_logs_analyses")

source("extract_brmsfit.R")

################################################################################

# PAGE SI.12, FOOTNOTE SI.23
# COMPARISON OF OBAMA ADMINISTRATION ENGAGEMENT MEASURE INCLUDING AND EXCLUDING
# TOURS

obama_logs_analysis <- fread("obama_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

# NUMBER AND PERCENTAGES OF OBSERVATIONS WITH SAME/DIFFERENT VALUES OF ENGAGEMENT
# DURING THE OBAMA ADMINISTRATION GIVEN WHETHER TOURS ARE INCLUDED
table(obama_logs_analysis$any_visits_leq0, 
      obama_logs_analysis$any_visits_notours_leq0)
round(prop.table(table(obama_logs_analysis$any_visits_leq0, 
                        obama_logs_analysis$any_visits_notours_leq0)), 3)*100

# CORRELATION COEFFICIENT FOR THE TWO MEASURES OF ENGAGEMENT
cor(obama_logs_analysis$any_visits_leq0, 
    obama_logs_analysis$any_visits_notours_leq0)

################################################################################

# TABLE SI.6 (MAIN PAPER MODELS)

load("tableSI6_col1.RData")
load("tableSI6_col2.RData")

texreg(l=list(extract.brmsfit(tableSI6_col1),
              extract.brmsfit(tableSI6_col2)),        
       longtable=TRUE, custom.model.names=c("Clinton WH",
                                            "Obama WH"),
       use.packages = TRUE, caption = "Main Paper Models",
       caption.above=TRUE, label = "table:main-paper-models-all",
       stars = 0,
       file="tableSI6.tex",
       custom.note = "This table provides the summaries of the models used to
       generate the predicted probabilities of presidential engagement during the 
       Clinton and Obama administrations presented in the main text.  The cell 
       entry for each covariate presents the posterior mean and 95\\%  credible 
       interval from the Bayesian multilevel logistic regression model using 
       data from the presidency indicated by the column heading.  Coefficients 
       denoted with $^{*}$ are those whose 95\\% credible intervals do not 
       include zero.  The models are fitted with the R package \\texttt{brms},   
       which interfaces with \\texttt{rstan} to perform estimation using the NUTS 
       (No-U-Turn Sampler) algorithm in  Stan.  Each model is fitted with 4 chains
       for 2000 iterations per chain (1000 iterations for warm-up, 1000 iterations
       for sampling).  The table omits the following observation-level control 
       variables: whether an organized interest employs lobbyists of its own and
       whether an organized interest indicates lobbying on each of 80 issues on
       its LDA filing in the previous time period.",
       custom.coef.map = list("Intercept"="Intercept",
                              "any_visits_leq0_1l"="Any Visits_{t-1}",
                              "loglobbyexp_1lP1"="log(Lobbying Expenditures)$_{t-1}$",
                              "made_donations_l1tol4"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$",
                              "made_donations_l1tol4:logamt_donations_1ltol4P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$:log(Contribution Amount$_{t-1\\mbox{ to } t-4}$)",
                              "made_donations_l1tol8"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$",
                              "made_donations_l1tol8:logamt_donations_1ltol8P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$:log(Contribution Amount$_{t-1\\mbox{ to } t-8}$)",
                              "industry_pidI"="Aligned with Neither Party",
                              "industry_pidR"="Aligned with Republicans"),
       custom.gof.names = c("$\\sigma_{organization}$",
                            "$\\sigma_{industry}$",
                            "$\\sigma_{timeperiod}$",
                            "Num. obs."))

# how many OIs?
length(unique(tableSI6_col1$data$crp_orgname))
length(unique(tableSI6_col2$data$crp_orgname))
# how many industries?
length(unique(tableSI6_col1$data$crp_industry))
length(unique(tableSI6_col2$data$crp_industry))
# how many time periods?
length(unique(tableSI6_col1$data$yearperiod))
length(unique(tableSI6_col2$data$yearperiod))

################################################################################

# TABLE SI.7 (OBAMA WITH AND WITHOUT TOURS)

load("tableSI6_col2.RData")
load("tableSI7_col2.RData")

texreg(l=list(extract.brmsfit(tableSI6_col2),
              extract.brmsfit(tableSI7_col2)),        
       longtable=TRUE, custom.model.names=c("All Visits",
                                            "Excluding Tours"),
       use.packages = TRUE, caption = "Obama White House\\textemdash Comparing Inclusion/Exclusion of Tour Visits",
       caption.above=TRUE, label = "table:tour-notour-comparison-obama",
       stars = 0,
       file="tableSI7.tex",
       custom.note = "This table compares the model summaries obtained using data
       from the Obama administration when utilizing all White House visits to identify
       instances of presidential engagement (first column) versus using only those
       visits not identified as tours using the Description column in the Obama era
       WAVES records (second column).  The cell entry for each covariate presents 
       the posterior mean and 95\\%  credible 
       interval from the Bayesian multilevel logistic regression model using 
       data from the presidency indicated by the column heading.  Coefficients 
       denoted with $^{*}$ are those whose 95\\% credible intervals do not 
       include zero.  The models are fitted with the R package \\texttt{brms},   
       which interfaces with \\texttt{rstan} to perform estimation using the NUTS 
       (No-U-Turn Sampler) algorithm in  Stan.  Each model is fitted with 4 chains
       for 2000 iterations per chain (1000 iterations for warm-up, 1000 iterations
       for sampling).  The table omits the following observation-level control 
       variables: whether an organized interest employs lobbyists of its own and
       whether an organized interest indicates lobbying on each of 80 issues on
       its LDA filing in the previous time period.",
       custom.coef.map = list("Intercept"="Intercept",
                              "any_visits_leq0_1l"="Any Visits_{t-1}",
                              "any_visits_notours_leq0_1l"="Any Visits_{t-1}",
                              "loglobbyexp_1lP1"="log(Lobbying Expenditures)$_{t-1}$",
                              "made_donations_l1tol4"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$",
                              "made_donations_l1tol4:logamt_donations_1ltol4P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$:log(Contribution Amount$_{t-1\\mbox{ to } t-4}$)",
                              "made_donations_l1tol8"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$",
                              "made_donations_l1tol8:logamt_donations_1ltol8P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$:log(Contribution Amount$_{t-1\\mbox{ to } t-8}$)",
                              "industry_pidI"="Aligned with Neither Party",
                              "industry_pidR"="Aligned with Republicans"),
       custom.gof.names = c("$\\sigma_{organization}$",
                            "$\\sigma_{industry}$",
                            "$\\sigma_{timeperiod}$",
                            "Num. obs."))

# how many OIs?
length(unique(tableSI6_col2$data$crp_orgname))
length(unique(tableSI7_col2$data$crp_orgname))
# how many industries?
length(unique(tableSI6_col2$data$crp_industry))
length(unique(tableSI7_col2$data$crp_industry))
# how many time periods?
length(unique(tableSI6_col2$data$yearperiod))
length(unique(tableSI7_col2$data$yearperiod))

################################################################################

# TABLE SI.8 (ENGAGEMENT MEASURED USING APPROXIMATE STRING MATCHING)

# NOTE: THE OUTPUTTED TABLE WAS FURTHER EDITED FOR PRESENTATION IN THE
# SUPPLEMENTAL INFORMATION (NAMELY, ADDITIONAL MULTICOLUMN HEADINGS FOR
# EACH PRESIDENCY)

load("tableSI8_col1.RData")
load("tableSI8_col2.RData")
load("tableSI8_col3.RData")
load("tableSI8_col4.RData")
load("tableSI8_col5.RData")
load("tableSI8_col6.RData")

texreg(l=list(extract.brmsfit(tableSI8_col1),
              extract.brmsfit(tableSI8_col2),
              extract.brmsfit(tableSI8_col3),
              extract.brmsfit(tableSI8_col4),
              extract.brmsfit(tableSI8_col5),
              extract.brmsfit(tableSI8_col6)),        
       longtable=TRUE, custom.model.names=c("$\\leq$ 1 Edit",
                                            "$\\leq$ 2 Edits",
                                            "$\\leq$ 3 Edits",
                                            "$\\leq$ 1 Edit",
                                            "$\\leq$ 2 Edits",
                                            "$\\leq$ 3 Edits"),
       use.packages = TRUE, 
       caption = "Main Paper Models with Different String Matching Tolerance Thresholds",
       caption.above=TRUE, label = "table:binary-nointer-clinton-obama-threshold",
       stars = 0,
       file="tableSI8.tex",
       custom.note = "The cell entry for each covariate presents the posterior 
       mean and 95\\%  credible interval from the Bayesian multilevel logistic 
       regression model using data from the Clinton and Obama presidencies with 
       the strong matching tolerance threshold indicated by the column heading.
       indicated by the column heading.  Coefficients denoted with $^{*}$ are 
       those whose 95\\% credible intervals do not include zero.  The models are
       fitted with the R package \\texttt{brms}, which interfaces with 
       \\texttt{rstan} to perform estimation using the NUTS (No-U-Turn Sampler) 
       algorithm in Stan.  Each model is fitted with 4 chains for 2000 
       iterations per chain (1000 iterations for warm-up, 1000 iterations for
       sampling).  The table omits the following observation-level control 
       variables: whether an organized interest employs lobbyists of its own and
       whether an organized interest indicates lobbying on each of 80 issues on
       its LDA filing in the previous time period.",
       custom.coef.map = list("Intercept"="Intercept",
                              "any_visits_leq1_1l"="Any Visits_{t-1}",
                              "any_visits_leq2_1l"="Any Visits_{t-1}",
                              "any_visits_leq3_1l"="Any Visits_{t-1}",
                              "loglobbyexp_1lP1"="log(Lobbying Expenditures)$_{t-1}$",
                              "made_donations_l1tol4"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$",
                              "made_donations_l1tol4:logamt_donations_1ltol4P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$:log(Contribution Amount$_{t-1\\mbox{ to } t-4}$)",
                              "made_donations_l1tol8"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$",
                              "made_donations_l1tol8:logamt_donations_1ltol8P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$:log(Contribution Amount$_{t-1\\mbox{ to } t-8}$)",
                              "industry_pidI"="Aligned with Neither Party",
                              "industry_pidR"="Aligned with Republicans"
       ),
       custom.gof.names = c("$\\sigma_{organization}$",
                            "$\\sigma_{industry}$",
                            "$\\sigma_{timeperiod}$",
                            "Num. obs."))

# how many OIs?
length(unique(tableSI8_col1$data$crp_orgname))
length(unique(tableSI8_col2$data$crp_orgname))
length(unique(tableSI8_col3$data$crp_orgname))
length(unique(tableSI8_col4$data$crp_orgname))
length(unique(tableSI8_col5$data$crp_orgname))
length(unique(tableSI8_col6$data$crp_orgname))
# how many industries?
length(unique(tableSI8_col1$data$crp_industry))
length(unique(tableSI8_col2$data$crp_industry))
length(unique(tableSI8_col3$data$crp_industry))
length(unique(tableSI8_col4$data$crp_industry))
length(unique(tableSI8_col5$data$crp_industry))
length(unique(tableSI8_col6$data$crp_industry))
# how many time periods?
length(unique(tableSI8_col1$data$yearperiod))
length(unique(tableSI8_col2$data$yearperiod))
length(unique(tableSI8_col3$data$yearperiod))
length(unique(tableSI8_col4$data$yearperiod))
length(unique(tableSI8_col5$data$yearperiod))
length(unique(tableSI8_col6$data$yearperiod))

################################################################################

# TABLE SI.9 (COUNT OUTCOMES)

load("tableSI9_col1.RData")
load("tableSI9_col2.RData")

texreg(l=list(extract.brmsfit(tableSI9_col1),
              extract.brmsfit(tableSI9_col2)),        
       longtable=TRUE, 
       custom.model.names=c("Clinton","Obama"),
       use.packages = TRUE, caption = "Main Paper Models with Count Outcomes",
       caption.above=TRUE, label = "table:main-paper-count-models-all",
       stars = 0,
       file="tableSI9.tex",
       custom.note = "This table provides the summaries of models analogous to 
       those used to generate the predicted probabilities of presidential 
       engagement during the Clinton and Obama administrations presented in the
       main text, but which use counts of the number of times presidents engage 
       with each interest in the specified time period rather than a binary 
       indicator of engagement.  The cell entry for each  covariate presents the
       posterior mean and 95\\%  credible interval from the Bayesian multilevel 
       negative binomial regression model using data from the presidency 
       indicated by the column heading.  Coefficients denoted with $^{*}$ 
       are those whose 95\\% credible intervals do not include zero.  The models
       are fitted with the R package \\texttt{brms}, which interfaces with 
       \\texttt{rstan} to perform estimation using the NUTS (No-U-Turn Sampler) 
       algorithm in Stan.  Each model is fitted with 4 chains for 2000 iterations
       per chain (1000 iterations for warm-up, 1000 iterations for sampling).  
       The table omits the following observation-level control variables: whether
       an organized interest employs lobbyists of its own and whether an organized 
       interest indicates lobbying on each of 80 issues on its LDA filing in the 
       previous time period.",
custom.coef.map = list("Intercept"="Intercept",
                       "num_visits_leq0_1l"="Num Visits_{t-1}",
                       "loglobbyexp_1lP1"="log(Lobbying Expenditures)$_{t-1}$",
                       "made_donations_l1tol4"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$",
                       "made_donations_l1tol4:logamt_donations_1ltol4P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$:log(Contribution Amount$_{t-1\\mbox{ to } t-4}$)",
                       "made_donations_l1tol8"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$",
                       "made_donations_l1tol8:logamt_donations_1ltol8P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$:log(Contribution Amount$_{t-1\\mbox{ to } t-8}$)",
                       "industry_pidI"="Aligned with Neither Party",
                       "industry_pidR"="Aligned with Republicans"),
custom.gof.names = c("$\\sigma_{organization}$",
                     "$\\sigma_{industry}$",
                     "$\\sigma_{timeperiod}$",
                     "Num. obs."))

# how many OIs?
length(unique(tableSI9_col1$data$crp_orgname))
length(unique(tableSI9_col2$data$crp_orgname))
# how many industries?
length(unique(tableSI9_col1$data$crp_industry))
length(unique(tableSI9_col2$data$crp_industry))
# how many time periods?
length(unique(tableSI9_col1$data$yearperiod))
length(unique(tableSI9_col2$data$yearperiod))

################################################################################

# TABLE SI.10 (ALTERNATIVE PREFERENCE MEASURES)

load("tableSI10_col1.RData")
load("tableSI10_col2.RData")
load("tableSI10_col3.RData")
load("tableSI10_col4.RData")

texreg(l=list(extract.brmsfit(tableSI10_col1),
              extract.brmsfit(tableSI10_col2),
              extract.brmsfit(tableSI10_col3),
              extract.brmsfit(tableSI10_col4)),       
       longtable=TRUE, custom.model.names=c("CFscore",
                                            "IGscore",
                                            "CFscore",
                                            "IGscore"),
       use.packages = TRUE, 
       caption = "Main Paper Models with Alternative Preference Measures",
       caption.above=TRUE, label = "table:main-paper-pref-models-all",
       stars = 0,
       file="tableSI10.tex",
       custom.note = "This table provides the summaries of models analogous to 
       those used to generate the predicted probabilities of presidential 
       engagement during the Clinton and Obama administrations presented in the 
       main text, but which use alternative measures of organized interests' 
       preferences.  The cell entry for each covariate presents the posterior 
       mean and 95\\%  credible interval from the Bayesian multilevel logistic 
       regression model using data from the presidency indicated by the column 
       heading.  Coefficients denoted with $^{*}$ are those whose 95\\% credible
       intervals do not include zero.  The models are fitted with the R package 
       \\texttt{brms},   which interfaces with \\texttt{rstan} to perform 
       estimation using the NUTS (No-U-Turn Sampler) algorithm in Stan.  Each 
       model is fitted with 4 chains for 2000 iterations per chain (1000 
       iterations for warm-up, 1000 iterations for sampling).  The table omits 
       the following observation-level control variables: whether an organized 
       interest employs lobbyists of its own and whether an organized interest
       indicates lobbying on each of 80 issues on its LDA filing in the previous 
       time period.",
       custom.coef.map = list("Intercept"="Intercept",
                              "any_visits_leq0_1l"="Any Visits_{t-1}",
                              "loglobbyexp_1lP1"="log(Lobbying Expenditures)$_{t-1}$",
                              "made_donations_l1tol4"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$",
                              "made_donations_l1tol4:logamt_donations_1ltol4P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-4}$:log(Contribution Amount$_{t-1\\mbox{ to } t-4}$)",
                              "made_donations_l1tol8"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$",
                              "made_donations_l1tol8:logamt_donations_1ltol8P1"="Made Natl Contributions$_{t-1\\mbox{ to } t-8}$:log(Contribution Amount$_{t-1\\mbox{ to } t-8}$)",
                              "cfscore"="CFScore",
                              "igscore"="IGScore"
       ),
       custom.gof.names = c("$\\sigma_{organization}$",
                            "$\\sigma_{industry}$",
                            "$\\sigma_{timeperiod}$",
                            "Num. obs."))

# how many OIs?
length(unique(tableSI10_col1$data$crp_orgname))
length(unique(tableSI10_col2$data$crp_orgname))
length(unique(tableSI10_col3$data$crp_orgname))
length(unique(tableSI10_col4$data$crp_orgname))
# how many industries?
length(unique(tableSI10_col1$data$crp_industry))
length(unique(tableSI10_col2$data$crp_industry))
length(unique(tableSI10_col3$data$crp_industry))
length(unique(tableSI10_col4$data$crp_industry))
# how many time periods?
length(unique(tableSI10_col1$data$yearperiod))
length(unique(tableSI10_col2$data$yearperiod))
length(unique(tableSI10_col3$data$yearperiod))
length(unique(tableSI10_col4$data$yearperiod))

################################################################################

# TABLE SI.11 (ENGAGEMENT QUALITY)

# NOTE: THERE IS NO EASY WAY TO ADAPT texreg TO ACCOMMODATE MULTIVARIATE (I.E.,
# MULTIPLE OUTCOME) brmsfit OBJECTS, SO THIS TABLE WAS CONSTRUCTED MANUALLY
# USING INFORMATION FROM THE MODEL SUMMARIES

# CHANGE max.print TO ALLOW ENTIRE MODEL SUMMARY TO PRINT

options(max.print = 999999)

load("tableSI11_cols1and2.RData")
load("tableSI11_cols3and4.RData")

summary(tableSI11_cols1and2)
# COEFFICIENTS OF INTEREST FOR TABLE ARE:
### anyvisitshighleq0_Intercept
### anyvisitslowleq0_Intercept
### anyvisitshighleq0_any_visits_high_leq0_1l
### anyvisitshighleq0_any_visits_low_leq0_1l
### anyvisitshighleq0_loglobbyexp_1lP1
### anyvisitshighleq0_made_donations_l1tol4
### anyvisitshighleq0_industry_pidI
### anyvisitshighleq0_industry_pidR
### anyvisitshighleq0_made_donations_l1tol4:logamt_donations_1ltol4P1
### anyvisitslowleq0_any_visits_high_leq0_1l
### anyvisitslowleq0_any_visits_low_leq0_1l
### anyvisitslowleq0_loglobbyexp_1lP1
### anyvisitslowleq0_made_donations_l1tol4
### anyvisitslowleq0_industry_pidI
### anyvisitslowleq0_industry_pidR
### anyvisitslowleq0_made_donations_l1tol4:logamt_donations_1ltol4P1
# GROUP-LEVEL EFFECTS OF INTEREST FOR TABLE PRINTED AT TOP OF SUMMARY:
### ~crp_industry, sd(anyvisitshighleq0_Intercept)
### ~crp_industry, sd(anyvisitslowleq0_Intercept)
### ~crp_industry, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
### ~crp_orgname:crp_industry, sd(anyvisitshighleq0_Intercept)
### ~crp_orgname:crp_industry, sd(anyvisitslowleq0_Intercept)
### ~crp_orgname:crp_industry, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
### ~yearperiod, sd(anyvisitshighleq0_Intercept)
### ~yearperiod, sd(anyvisitslowleq0_Intercept)
### ~yearperiod, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
# NUMBER OF OBSERVATIONS AND NUMBER OF UNIQUE GROUPINGS ALSO PRINTED AT TOP OF
# SUMMARY

summary(tableSI11_cols3and4)
# COEFFICIENTS OF INTEREST FOR TABLE ARE:
### anyvisitshighleq0_Intercept
### anyvisitslowleq0_Intercept
### anyvisitshighleq0_any_visits_high_leq0_1l
### anyvisitshighleq0_any_visits_low_leq0_1l
### anyvisitshighleq0_loglobbyexp_1lP1
### anyvisitshighleq0_made_donations_l1tol8
### anyvisitshighleq0_industry_pidI
### anyvisitshighleq0_industry_pidR
### anyvisitshighleq0_made_donations_l1tol8:logamt_donations_1ltol8P1
### anyvisitslowleq0_any_visits_high_leq0_1l
### anyvisitslowleq0_any_visits_low_leq0_1l
### anyvisitslowleq0_loglobbyexp_1lP1
### anyvisitslowleq0_made_donations_l1tol8
### anyvisitslowleq0_industry_pidI
### anyvisitslowleq0_industry_pidR
### anyvisitslowleq0_made_donations_l1tol8:logamt_donations_1ltol8P1
# GROUP-LEVEL EFFECTS OF INTEREST FOR TABLE PRINTED AT TOP OF SUMMARY:
### ~crp_industry, sd(anyvisitshighleq0_Intercept)
### ~crp_industry, sd(anyvisitslowleq0_Intercept)
### ~crp_industry, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
### ~crp_orgname:crp_industry, sd(anyvisitshighleq0_Intercept)
### ~crp_orgname:crp_industry, sd(anyvisitslowleq0_Intercept)
### ~crp_orgname:crp_industry, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
### ~yearperiod, sd(anyvisitshighleq0_Intercept)
### ~yearperiod, sd(anyvisitslowleq0_Intercept)
### ~yearperiod, cor(anyvisitshighleq0_Intercept,anyvisitslowleq0_Intercept)
# NUMBER OF OBSERVATIONS AND NUMBER OF UNIQUE GROUPINGS ALSO PRINTED AT TOP OF
# SUMMARY